## /Users/aviral/projects/envtracing-paper/data/corpus-sloc.fst
## /Users/aviral/projects/envtracing-paper/data/native_env_first.fst
## /Users/aviral/projects/envtracing-paper/data/native_env_second.fst
## /Users/aviral/projects/envtracing-paper/data/native_env_third.fst

0.1 Table Structure

LOCK_UNLOCK_RAW <-
    call_stack %>%
    filter(fun_name %in% c("lockEnvironment", "lockBinding", "unlockBinding"))
## /Users/aviral/projects/envtracing-paper/data/call_stack.fst
str(LOCK_UNLOCK_RAW)
## 'data.frame':    343359 obs. of  32 variables:
##  $ type                : chr  "example" "example" "example" "example" ...
##  $ package             : chr  "abind" "abind" "abind" "abind" ...
##  $ filename            : chr  "abind" "abind" "abind" "acorn" ...
##  $ depth               : int  13 14 14 13 14 14 13 14 14 13 ...
##  $ fun_name            : chr  "lockEnvironment" "lockEnvironment" "lockEnvironment" "lockEnvironment" ...
##  $ result_env_type     : chr  "NULL" "NULL" "NULL" "NULL" ...
##  $ result_env_qual_name: chr  NA NA NA NA ...
##  $ arg_env_type_1      : chr  "environment" "environment" "environment" "environment" ...
##  $ arg_env_qual_name_1 : chr  "NamedEnv::abind" "NamedEnv::abind" "NamedEnv::NA" "NamedEnv::abind" ...
##  $ arg_env_type_2      : chr  NA NA NA NA ...
##  $ arg_env_qual_name_2 : chr  NA NA NA NA ...
##  $ env_name            : chr  NA NA NA NA ...
##  $ symbol              : chr  NA NA NA NA ...
##  $ bindings            : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ fun_type            : chr  NA NA NA NA ...
##  $ fun_qual_name       : chr  NA NA NA NA ...
##  $ n_type              : chr  NA NA NA NA ...
##  $ n                   : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ which_type          : chr  NA NA NA NA ...
##  $ which               : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ x_type              : chr  NA NA NA NA ...
##  $ x_int               : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ x_char              : chr  NA NA NA NA ...
##  $ seq_env_qual_name   : chr  NA NA NA NA ...
##  $ se_env_qual_name    : chr  NA NA NA NA ...
##  $ se_val_type         : chr  NA NA NA NA ...
##  $ call_expr           : chr  "lockEnvironment(env, TRUE)" "lockEnvironment(ns, TRUE)" "lockEnvironment(parent.env(ns), TRUE)" "lockEnvironment(env, TRUE)" ...
##  $ qual_name_1         : chr  "base*$#$*lockEnvironment" "base*$#$*lockEnvironment" "base*$#$*lockEnvironment" "base*$#$*lockEnvironment" ...
##  $ qual_name_2         : chr  "base*$#$*attachNamespace" "base*$#$*loadNamespace*$#$*sealNamespace" "base*$#$*loadNamespace*$#$*sealNamespace" "base*$#$*attachNamespace" ...
##  $ qual_name_3         : chr  "base*$#$*library" "base*$#$*loadNamespace" "base*$#$*loadNamespace" "base*$#$*library" ...
##  $ qual_name_4         : chr  NA NA NA NA ...
##  $ count               : int  1 1 1 1 1 1 1 1 1 1 ...

0.2 Summary

LOCK_UNLOCK_SUMMARY <-
    LOCK_UNLOCK_RAW %>% 
    count(fun_name, wt = count, name = "count") %>%
    arrange(desc(count))

0.3 lockEnvironment

0.3.1 Raw

LOCK_ENV_TABLE <-
    LOCK_UNLOCK_RAW %>%
    filter(fun_name == "lockEnvironment") %>%
    mutate(source = paste0(package, "*$#$*", type, "/", filename)) %>%
    mutate(qual_name_2 = case_when(str_starts(qual_name_2, fixed("base*$#$*loadNamespace*$#$*")) & qual_name_2 != "base*$#$*loadNamespace*$#$*sealNamespace" ~ "base*$#$*loadNamespace*$#$*sealNamespace",
                                   qual_name_2 == "<NA>*$#$*2ae04899b56d2a4922d9088b16dca9f5493c3ceb37dabbfb0d6625d23af95e73e9e212ac86b83f1445eacfdbd3fa81b3a661bb7f3c5da3fa04debfa085395681" ~ "R6*$#$*clone_method",
                                   qual_name_2 == "<NA>*$#$*new" ~ "R6*$#$*new",
                                   TRUE ~ qual_name_2)) %>%
    mutate(pack_name = map_chr(str_split(qual_name_2, fixed("*$#$*")), ~.[1])) %>%
    mutate(arg_env_pack_name = map_chr(str_split(arg_env_qual_name_1, fixed("*$#$*")), ~.[1])) %>%
    mutate(category = case_when(str_detect(qual_name_2, fixed("/")) ~ "Top-Level",
                                pack_name %in% CORE_PACKAGES ~ "Core",
                                TRUE ~ "User")) %>%
    count(fun_name, source, call_expr, category, pack_name, qual_name_2, qual_name_3, qual_name_4, arg_env_pack_name, arg_env_qual_name = arg_env_qual_name_1, wt = count, name = "calls") %>%
    arrange(desc(calls))
    
datatable(LOCK_ENV_TABLE)
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html

0.4 Summary

LOCK_ENV_SUMMARY <-
    LOCK_ENV_TABLE %>%
    group_by(fun_name, category) %>%
    summarize(calls = sum(calls),
              packages = length(unique(pack_name)),
              functions = length(unique(qual_name_2)),
              pack_names = paste(unique(pack_name), collapse = ", "),
              fun_names = paste(unique(qual_name_2), collapse = ", ")) %>%
    ungroup() %>%
    mutate(call_perc = round(100* calls / sum(calls), 2)) %>%
    arrange(desc(calls))
## `summarise()` has grouped output by 'fun_name'. You can override using the `.groups` argument.
datatable(LOCK_ENV_SUMMARY)
LockEnvironmentRlangCallCount <- 
LOCK_ENV_TABLE %>%
filter(pack_name == "rlang") %>%
pull(calls) %>%
sum() %>%
print()
## [1] 5
MacGen$from_vectors("LockEnvironmentRlangCallCount", LockEnvironmentRlangCallCount)
## [1] "\\LockEnvironmentRlangCallCount"

0.5 lockBinding

0.5.1 Raw

LOCK_BINDING_TABLE <-
    LOCK_UNLOCK_RAW %>%
    filter(fun_name == "lockBinding") %>%
    mutate(source = paste0(package, "*$#$*", type, "/", filename)) %>%
    mutate(qual_name_2 = case_when(str_starts(qual_name_2, fixed("methods*$#$*")) & !(qual_name_2 %in% c("methods*$#$*.assignOverBinding", "methods*$#$*.setDummyField")) ~ "methods*$#$*.initForEnvRefClass",
                                   qual_name_2 == "<NA>*$#$*2ae04899b56d2a4922d9088b16dca9f5493c3ceb37dabbfb0d6625d23af95e73e9e212ac86b83f1445eacfdbd3fa81b3a661bb7f3c5da3fa04debfa085395681*$#$*copy_slice"~ "R6*$#$*generator_funs*$#$*clone_method",
                                   qual_name_2 == "<NA>*$#$*new" ~ "R6*$#$*new",
                                   qual_name_2 == "base*$#$*lapply" ~ "rlang*$#$*env_binding_lock",
                                   TRUE ~ qual_name_2)) %>%
    mutate(pack_name = map_chr(str_split(qual_name_2, fixed("*$#$*")), ~.[1])) %>%
    mutate(arg_env_pack_name = map_chr(str_split(arg_env_qual_name_1, fixed("*$#$*")), ~.[1])) %>%
    mutate(category = case_when(str_detect(qual_name_2, fixed("/")) ~ "Top-Level",
                                pack_name %in% CORE_PACKAGES ~ "Core",
                                TRUE ~ "User")) %>%
    count(fun_name, source, call_expr, category, pack_name, qual_name_2, qual_name_3, qual_name_4, arg_env_pack_name, arg_env_qual_name = arg_env_qual_name_1, wt = count, name = "calls") %>%
    arrange(desc(calls))
    
datatable(LOCK_BINDING_TABLE)
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html

0.6 Summary

LOCK_BINDING_SUMMARY <-
    LOCK_BINDING_TABLE %>%
    group_by(fun_name, category) %>%
    summarize(calls = sum(calls),
              packages = length(unique(pack_name)),
              functions = length(unique(qual_name_2)),
              pack_names = paste(unique(pack_name), collapse = ", "),
              fun_names = paste(unique(qual_name_2), collapse = ", ")) %>%
    ungroup() %>%
    mutate(call_perc = round(100* calls / sum(calls), 2)) %>%
    arrange(desc(calls))
## `summarise()` has grouped output by 'fun_name'. You can override using the `.groups` argument.
datatable(LOCK_BINDING_SUMMARY)
LockBindingRSixCallCount <- 
LOCK_BINDING_TABLE %>%
filter(pack_name == "R6") %>%
pull(calls) %>%
sum() %>%
print()
## [1] 303407
MacGen$from_vectors("LockBindingRSixCallPerc",
                     latex_sanitize(label_percent()(LockBindingRSixCallCount / sum(LOCK_BINDING_SUMMARY$calls))))
## [1] "\\LockBindingRSixCallPerc"

0.7 unlockBinding

0.7.1 Raw

UNLOCK_BINDING_TABLE <-
    LOCK_UNLOCK_RAW %>%
    filter(fun_name == "unlockBinding") %>%
    mutate(source = paste0(package, "*$#$*", type, "/", filename)) %>%
    mutate(qual_name_2 = case_when(qual_name_2 == "base*$#$*lapply" ~ "rlang*$#$*env_binding_unlock",
                                   qual_name_2 == "base*$#$*eval" ~ qual_name_3,
                                   TRUE ~ qual_name_2)) %>%
    mutate(pack_name = map_chr(str_split(qual_name_2, fixed("*$#$*")), ~.[1])) %>%
    mutate(arg_env_pack_name = map_chr(str_split(arg_env_qual_name_1, fixed("*$#$*")), ~.[1])) %>%
    mutate(category = case_when(str_detect(qual_name_2, fixed("/")) ~ "Top-Level",
                                pack_name %in% CORE_PACKAGES ~ "Core",
                                TRUE ~ "User")) %>%
    count(fun_name, source, call_expr, category, pack_name, qual_name_2, qual_name_3, qual_name_4, arg_env_pack_name, arg_env_qual_name = arg_env_qual_name_1, wt = count, name = "calls") %>%
    arrange(desc(calls))
    
datatable(UNLOCK_BINDING_TABLE)

0.8 Summary

UNLOCK_BINDING_SUMMARY <-
    UNLOCK_BINDING_TABLE %>%
    group_by(fun_name, category) %>%
    summarize(calls = sum(calls),
              packages = length(unique(pack_name)),
              functions = length(unique(qual_name_2)),
              pack_names = paste(unique(pack_name), collapse = ", "),
              fun_names = paste(unique(qual_name_2), collapse = ", ")) %>%
    ungroup() %>%
    mutate(call_perc = round(100* calls / sum(calls), 2)) %>%
    arrange(desc(calls))
## `summarise()` has grouped output by 'fun_name'. You can override using the `.groups` argument.
datatable(UNLOCK_BINDING_SUMMARY)

0.9 Results

0.9.1 Combined Table

ALL_SUMMARY <- 
    bind_rows(LOCK_ENV_SUMMARY, LOCK_BINDING_SUMMARY, UNLOCK_BINDING_SUMMARY) %>%
    summarize(fun_name, category, packages, functions, calls, call_perc) %>%
    arrange(fun_name, category) %>%
    rename(PackCnt = packages, FunCnt = functions, CallCnt = calls, CallPerc = call_perc) %>%
    mutate(CallCnt = label_number_si(accuracy = 0.1)(CallCnt),
           CallPerc = latex_sanitize(label_percent()(CallPerc / 100)))
    
    
capitalize_first <- function(x) {
  substr(x, 1, 1) <- toupper(substr(x, 1, 1))
  x
}

datatable(ALL_SUMMARY)
prefix <- str_to_title(ALL_SUMMARY$fun_name)
MacGen$from_df(ALL_SUMMARY,
               PackCnt,
               FunCnt,
               CallCnt,
               CallPerc,
               prefix = paste0(capitalize_first(ALL_SUMMARY$fun_name), ALL_SUMMARY$category))
##  [1] "\\LockBindingCorePackCnt"      "\\LockBindingUserPackCnt"     
##  [3] "\\LockEnvironmentCorePackCnt"  "\\LockEnvironmentUserPackCnt" 
##  [5] "\\UnlockBindingCorePackCnt"    "\\UnlockBindingUserPackCnt"   
##  [7] "\\LockBindingCoreFunCnt"       "\\LockBindingUserFunCnt"      
##  [9] "\\LockEnvironmentCoreFunCnt"   "\\LockEnvironmentUserFunCnt"  
## [11] "\\UnlockBindingCoreFunCnt"     "\\UnlockBindingUserFunCnt"    
## [13] "\\LockBindingCoreCallCnt"      "\\LockBindingUserCallCnt"     
## [15] "\\LockEnvironmentCoreCallCnt"  "\\LockEnvironmentUserCallCnt" 
## [17] "\\UnlockBindingCoreCallCnt"    "\\UnlockBindingUserCallCnt"   
## [19] "\\LockBindingCoreCallPerc"     "\\LockBindingUserCallPerc"    
## [21] "\\LockEnvironmentCoreCallPerc" "\\LockEnvironmentUserCallPerc"
## [23] "\\UnlockBindingCoreCallPerc"   "\\UnlockBindingUserCallPerc"